Load data from ~/Orthonectida/Projects/Biostatistic/Biostat_2022/scripts/CharityHospital_R_2022-11-02_1448.r
# Base vars:
base_vars.factor = c('gender.factor', 'age.group',
# Docs
'id_status.factor.reason', 'oms_status.factor.reason', 'sn_status.factor.reason',
# Additctions
'nicotin.factor', 'alcogolic.factor', 'narco.factor', 'ne_narco.factor')
base_vars.bool = c('id_status.factor.bool', 'oms_status.factor.bool', 'sn_status.factor.bool')
quant_vars = c('ObsNum', 'age.actual')
# Dummy variables:
dummy_vars_raw = c('Observation', 'Homeless',
'ds_icd_1.factor', 'ds_icd_2.factor', 'ds_icd_3.factor',
'etest_hiv.factor', 'etest_hbsag.factor', 'etest_hcv.factor', 'etest_lues.factor', 'etest_covid19.factor')
dummy_vars_already <- c('complaint_lite')
# Dinamic variables
dinamic_vars = c('Observation', 'Homeless')
## Transform BirthDate to actual age (26.11.2022)
data$age.actual[!is.na(data$date_bd)] <-
age_calc(as.Date(data$date_bd[!is.na(data$date_bd)]),
Sys.Date(),
units = 'years') %>%
floor
# Age group
data <-
data %>%
dplyr::mutate(
age.group = case_when(
age.actual < 18 ~ "<18 (несовершеннолетние)",
age.actual >= 18 & age.actual < 45 ~ "18-44 (молодой возраст)",
age.actual >= 45 & age.actual < 60 ~ "45-59 (средний возраст)",
age.actual >= 60 & age.actual < 75 ~ "60-74 (пожилой возраст)",
age.actual >= 75 ~ "75+ (старческий возраст)"
)
)
data$age.group <- factor(data$age.group, levels = sort(unique(data$age.group)))
# Dinamic vars (should be processed before dummy)
data_dinamic <-
data %>%
dplyr::select(record_id, dinamic_vars) %>%
dplyr::group_by(record_id) %>%
dplyr::summarise_all(function(x) paste(x, collapse = '|')) %>%
dplyr::rename_all(function(x) ifelse(x != 'record_id', paste0(x, '.dinamic'), x)) %>%
dplyr::mutate_at(vars(ends_with('.dinamic')),
function(x) first_last_dinamics(x) %>% replace_na('Нет данных') %>% as.factor)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(dinamic_vars)
##
## # Now:
## data %>% select(all_of(dinamic_vars))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
# Renew names of dinamic vars with dinamic suffix
dinamic_vars = paste0(dinamic_vars, '.dinamic')
# Proccess dummy vars (drop original dummy vars)
data_dummy <-
data %>%
dummy_cols(remove_first_dummy = F, ignore_na = T,
select_columns = dummy_vars_raw) %>%
dplyr::select(-dummy_vars_raw) %>% # remove original vars
# Rename dummy vars already (for further join with original df)
dplyr::rename_at(vars(starts_with(dummy_vars_already)), ~paste0('.',.)) %>%
dplyr::select(record_id, starts_with(c(dummy_vars_raw, paste0('.', dummy_vars_already)), ignore.case = F)) %>% # select new vars + already dumm vars
dplyr::group_by(record_id) %>%
dplyr::summarise_all(function(x) sum(x, na.rm =T)) %>%
dplyr::ungroup() %>%
dplyr::select_if(function(x) sum(x, na.rm = T) != 0) # drop zero sum
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(dummy_vars_raw)
##
## # Now:
## data %>% select(all_of(dummy_vars_raw))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
# Add new dummied variables to list of quant vars
dummy_quant_vars = data_dummy %>% colnames %>% .[-1] # drom record_id
# Process quantative vars (among observation)
data_collapse <-
data %>%
dplyr::group_by(record_id) %>%
dplyr::summarise(ObsNum = ifelse(n() == 1, 1, n() - 1))
# Rowwise vars
data_rowwise <-
data %>%
dplyr::filter(is.na(Observation)) %>% # Keep only informative rows
dplyr::select(record_id) %>%
cbind(sum_rowwise_vars(data, 'ch_ds', 'отрицает')) # bind ID with rowwise vars
# Add rowwise new variables to quant
quant_vars = c(quant_vars, data_rowwise %>% dplyr::select(ends_with('.number')) %>% colnames)
selected_vars <- c(base_vars.bool, base_vars.factor, quant_vars, dummy_quant_vars, dinamic_vars)
##
data_done <-
data %>%
# filter out rows without base info
dplyr::filter(is.na(redcap_repeat_instrument)) %>%
# Join with summed dummy vars
right_join(data_dummy, by = 'record_id') %>%
# Join with quant vars
right_join(data_collapse, by = 'record_id') %>%
# Join with rowwise vars
right_join(data_rowwise, by = 'record_id') %>%
# Join with dinamic vars
right_join(data_dinamic, by = 'record_id') %>%
dplyr::select(record_id, all_of(selected_vars))
# record_id, gender.factor, age.group,
# Homeless.dinamic, id_status.factor.bool,
# oms_status.factor.bool, sn_status.factor.bool,
# ObsNum
ggGender1 <-
data_done %>%
dplyr::filter(!is.na(gender.factor) & !is.na(age.group)) %>%
dplyr::group_by(age.group, gender.factor) %>%
count %>%
ggplot(aes(n, fct_rev(age.group), fill = gender.factor )) +
geom_bar(stat = 'identity', position = 'dodge') +
labs(x = 'Patients', y = 'Age group', fill = 'Gender') +
scale_fill_manual(values = wes_palettes$Rushmore1[c(3,4)]) +
theme_bw() +
theme(axis.title = element_text(face = 'bold', size = 14),
legend.title = element_text(face = 'bold', size = 14),
axis.text = element_text(size = 12),
legend.text = element_text(size = 12))
ggGender1
ggGender2 <-
data_done %>%
dplyr::mutate(age.bins = cut(data_done$age.actual, 15),
count = 1) %>%
dplyr::select(age.bins, count, gender.factor) %>%
aggregate(count ~ gender.factor + age.bins, data = ., length) %>%
dplyr::mutate(count = ifelse(gender.factor == 'мужской', count * -1, count)) %>%
#Plot
ggplot(aes(age.bins, count, fill = gender.factor)) +
geom_bar(stat = 'identity') +
facet_share(~gender.factor, dir = 'h', scales = 'free', reverse_num = T) +
coord_flip() +
labs(y = 'Age', x = 'Patients', fill = 'Sex') +
scale_fill_manual(values = wes_palettes$Rushmore1[c(3,4)]) +
theme_bw() +
theme(axis.title = element_text(face = 'bold', size = 14),
legend.title = element_text(face = 'bold', size = 14),
axis.text = element_text(size = 12),
legend.text = element_text(size = 12),
strip.text.x = element_text(face = 'bold', size = 14))
ggGender2
data %>%
dplyr::filter(!is.na(redcap_repeat_instrument)) %>%
dplyr::select(record_id, redcap_repeat_instrument, Homeless) %>%
dplyr::summarise(Count = table(Homeless, exclude = NULL),
Homeless = names(table(Homeless, exclude = NULL))) %>%
dplyr::mutate(Homeless = case_when(is.na(Homeless) ~ 'нет данных',
T ~ Homeless)) %>%
# Plot
ggplot(aes(Count, fct_reorder(Homeless, Count) , fill = Homeless)) +
geom_bar(stat = 'identity') +
scale_fill_manual(values = wes_palettes$Rushmore1[2:5]) +
theme_bw() +
theme(axis.title = element_text(face = 'bold', size = 14),
legend.title = element_text(face = 'bold', size = 14),
axis.text = element_text(size = 12),
legend.text = element_text(size = 12),
legend.position = 'none') +
labs(x = 'Количество посещений', y = 'Тип бездомности')
## Don't know how to automatically pick scale for object of type <table>.
## Defaulting to continuous.
ggHome <-
data_done %>%
dplyr::filter(!is.na(Homeless.dinamic)) %>%
dplyr::group_by(Homeless.dinamic) %>%
count %>%
ggplot(aes(n, fct_reorder(Homeless.dinamic, desc(n)))) +
geom_bar(stat = 'identity') +
# Appereance
labs(x = 'Patients', fill = 'Age group', y = 'Home status') +
scale_fill_manual(values = rev(wes_palettes$Rushmore1)) +
theme_bw() +
theme(axis.title = element_text(face = 'bold', size = 14),
legend.title = element_text(face = 'bold', size = 14),
axis.text = element_text(size = 12),
legend.text = element_text(size = 12))
ggHome
# Parlament plot
df_count <-
data_done %>%
dplyr::filter(!is.na(Homeless.dinamic)) %>%
dplyr::group_by(Homeless.dinamic) %>%
count %>%
dplyr::mutate(Homeless.dinamic = as.character(Homeless.dinamic)) %>%
dplyr::arrange(desc(n))
# Plot
ggplot(df_count) +
geom_parliament(aes(seats = n, fill = Homeless.dinamic)) +
scale_fill_manual(values = c('darkgrey',
"black", "blue", "lightblue", "yellow",
"red","purple", "green",
'orange', 'tomato4'),
labels = df_count$Homeless.dinamic) +
coord_fixed() +
labs(fill = 'Тип бездомности') +
theme(axis.title = element_text(face = 'bold', size = 14),
legend.title = element_text(face = 'bold', size = 14),
axis.text = element_text(size = 12),
legend.text = element_text(size = 12)) +
theme_void()
## Warning: Using the `size` aesthetic in this geom was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` in the `default_aes` field and elsewhere instead.
data_done %>%
dplyr::select(Homeless.dinamic, nicotin.factor, alcogolic.factor, narco.factor, ne_narco.factor) %>%
# dplyr::filter(!is.na(age.group)) %>%
dplyr::group_by(Homeless.dinamic) %>%
dplyr::summarise(answer = names(table(alcogolic.factor)), # all factors - same value and order
Алко = table(alcogolic.factor),
Никотин = table(nicotin.factor),
Нарко = table(narco.factor),
# narco.factor = names(table(narco.factor)),
НеНарко = table(ne_narco.factor)
# ne_narco.factor = names(table(ne_narco.factor))
) %>%
dplyr::ungroup() %>%
melt %>%
dplyr::mutate(value = as.numeric(value)) %>%
dplyr::group_by(Homeless.dinamic) %>% dplyr::mutate(Sum = sum(value)) %>% dplyr::ungroup() %>%
ggplot(aes(value, answer, fill = Homeless.dinamic)) +
geom_bar(stat = 'identity') +
facet_grid(cols = vars(variable)) +
# Appereance
labs(x = 'Patients', fill = 'Home status', y = 'Answer') +
# scale_fill_manual(values = c(wes_palettes$BottleRocket2, wes_palettes$BottleRocket1)) +
theme_bw() +
theme(axis.title = element_text(face = 'bold', size = 14),
legend.title = element_text(face = 'bold', size = 14),
axis.text = element_text(size = 12),
legend.text = element_text(size = 12),
strip.text.x = element_text(face = 'bold', size = 14))
tmp = data$citizen.factor %>% table
countries.citizen <- tmp[tmp != 0] %>%
as.data.frame %>% setNames(c('Answer', 'Freq')) %>%
ggplot(aes(Freq, fct_reorder(Answer, Freq))) +
geom_bar(stat = 'identity', fill = wes_palette("Royal2")[3]) +
theme_bw() +
labs(x = 'Количество пациентов (по посещениям)', y = 'Страны (ответ со слов)') +
theme(axis.title = element_text(face = 'bold', size = 11),
legend.title = element_text(face = 'bold', size = 11),
axis.text = element_text(size = 10),
legend.text = element_text(size = 10))
ggplotly(countries.citizen)
countries.citizen
library('wesanderson')
library('ggplot2')
library('plotly')
icd_10 <- data %>%
dplyr::select(record_id, starts_with('ds_icd'), -ends_with('.factor')) %>%
as.data.table %>%
melt.data.table(id.vars = 'record_id', variable.name = "ds_icd", value.name = "illness_name") %>%
na.omit() %>%
as.data.frame %>%
transform(illness_name = factor(illness_name,
levels = c(1:19),
labels = c("A00-B99", "C00-D48", "D50-D89", "E00-E90", "F00-F99", "G0-G99", "H00-H59", "H65-H75", "I00-I99", "J00-J99", "K00-K93", "L55-L59", "M00-M99", "N00-N99", "O00-O99", "Q00-Q99", "S00-T98", "V01-Y98", "Z00-Z13")))
#icd_10$illness_name <- as.factor(icd_10$illness_name)
icd_10.stat <- ggplot(data = icd_10, aes(x = illness_name),
position="dodge") +
geom_bar(fill = wes_palette("Royal2")[5]) +
xlab('Заболевания по МКБ-10') +
ylab('Количество пациентов с данным заболеванием (по посещениям)') +
theme(legend.position="left") +
aes(stringr::str_wrap(icd_10$illness_name, 15)) + xlab(NULL) +
theme_minimal() +
coord_flip()
ggplotly(icd_10.stat)
## Warning: Use of `icd_10$illness_name` is discouraged.
## ℹ Use `illness_name` instead.
icd_10.stat
## Warning: Use of `icd_10$illness_name` is discouraged.
## ℹ Use `illness_name` instead.
#Создание общего столбца: icd; кодирование `Homeless` в систему 0,1,2; корреляционный анализ
#diamonds$factor_price <- ifelse(diamonds$price >= mean(diamonds$price),"1","0")
#diamonds$factor_carat <- ifelse(diamonds$carat >= mean(diamonds$carat),"1","0")
#diamods_pricaAndCarat <- table(diamonds$factor_price, diamonds$factor_carat)
#stat <- chisq.test(diamods_pricaAndCarat)
#main_stat <- c(stat$statistic)
#
#library(corrplot)
#insc_cor <- cor(insc_num)
#corrplot(insc_cor, order = 'AOE', col = COL2('RdBu', 10))
#corrplot(insc_cor, method = 'number')
icd10.homeless <- data %>%
dplyr::select(record_id, Homeless, starts_with('ds_')) %>%
dplyr::select(-ends_with('.factor')) %>%
as.data.table %>%
melt.data.table(id.vars = c('record_id', 'Homeless')) %>%
dplyr::mutate(value = as.factor(value)) %>%
dplyr::group_by(Homeless) %>%
dplyr::summarise(icd10 = factor(names(table(value)), 1:19),
Count = table(value)
) %>%
dplyr::mutate(Homeless = factor(Homeless, levels = c('уличный', 'условно уличный', 'домашний', 'нет данных'))) %>%
# Plot
ggplot(aes(icd10, Count)) +
facet_grid(rows = vars(Homeless)) +
geom_bar(stat = 'identity', fill = wes_palette("Royal1")[4])
## `summarise()` has grouped output by 'Homeless'. You can override using the
## `.groups` argument.
icd10.homeless
## Don't know how to automatically pick scale for object of type <table>.
## Defaulting to continuous.
Social diseases